rm(list=ls())

(WD <- getwd())
if (!is.null(WD)) setwd(WD)

indata0      = paste0(WD,"/_individual_data/_spfrawdata/","", collapse = NULL)
indata1      = paste0(WD,"/_average_data/_spfrawdata/","", collapse = NULL)
indata2      = paste0(WD,"/_individual_data/_otherrawdata/","", collapse = NULL)
indata3      = paste0(WD,"/_average_data/_otherrawdata/","", collapse = NULL)

library(haven)
library(AER)
library(sandwich)
library(lmtest)
library(pracma)
library(dplyr)
library(stargazer)
library(plm)
library(pracma)
library(DataCombine)
library(jtools)
library(plyr)
library(ggplot2)

out_nvar        = 13

load(paste(indata1,"us_spf_pgdp_avr.Rda",sep=""))
data_avr        = data
data_avr        = data[data_avr$qdate>=1970.00,]
data_avr        = data[data_avr$qdate<=2020.00,]

load(paste(indata0,"us_spf_pgdp_ind.Rda",sep=""))
data$ID         = data$id
data            = data[data$qdate>=1970.00,]
data            = data[data$qdate<=2020.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))

######## Figure 1 (bottom table)   ############
ls_out_avr       = lm(fc_err ~ fc_rev, data=data_avr)
robust_out_avr   = sqrt(diag(vcovHC(ls_out_avr, type = "HC1")))
  
plm_out_rev      = plm(fc_err ~ fc_rev, data=data,index=c("ID", "qdate"), model="within")
double_out_rev   = sqrt(diag(vcovDC(plm_out_rev, type = "HC1")))

plm_out_cons     = plm(fc_err ~ cons, data=data,index=c("ID", "qdate"), model="within")
double_out_cons  = sqrt(diag(vcovDC(plm_out_cons, type = "HC1")))

plm_out_comb     = plm(fc_err ~ fc_rev + cons, data=data,index=c("ID", "qdate"), model="within")
double_out_comb  = sqrt(diag(vcovDC(plm_out_comb, type = "HC1")))

stderror = list(robust_out_avr, double_out_rev, double_out_cons, double_out_comb)

stargazer(ls_out_avr, plm_out_rev, plm_out_cons, plm_out_comb, 
          type = "text",
          se = stderror,
          out="_tables/figure_1_bottom.txt")


######## Table 1 ############
out_beta_avr  = rep(NA, out_nvar)
out_se_avr    = rep(NA, out_nvar)
out_beta_rev  = rep(NA, out_nvar)
out_se_rev    = rep(NA, out_nvar)
out_beta_cons = rep(NA, out_nvar)
out_se_cons   = rep(NA, out_nvar)

iter        = 0
data_series =  c('us_spf_pgdp_ind.rda','us_spf_cpi_ind.rda','us_spf_rgdp_ind.Rda','us_spf_pgdp_ind.rda','us_spf_cpi_ind.rda','us_spf_rgdp_ind.Rda', 'us_spf_pgdp_ind_h2.rda', 'us_spf_pgdp_ind_fin.rda', 'us_spf_pgdp_ind_nonfin.rda', 'ea_spf_cpi_ind.rda', 'ea_spf_rgdp_ind.rda', 'us_liv_cpi_ind.rda', 'us_liv_rgdp_ind.rda')
for ( i in data_series){
  iter = iter+1
  if (iter<=9){
  load(paste(indata0,i,sep=""))
  } else if (iter>=10) {
  load(paste(indata2,i,sep=""))
  }

  data$ID       = data$id
  data          = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))  
  if (iter<=3){
    data            = data[data$qdate>=1970,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=4 & iter<=6) {
    data            = data[data$qdate>=1992,]
    data            = data[data$qdate<= 2020,]
  } else if (iter == 7) {
    data            = data[data$qdate>=1970,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=8 & iter<=9) {
    data            = data[data$qdate>=1990,]  #industry data doesn't exist before '90
    data            = data[data$qdate<= 2020,]
  } else if (iter>=10 & iter<=11) {
    data            = data[data$qdate>=2000,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=12) {
    data            = data[data$qdate>=1993.25,]
    data            = data[data$qdate<= 2020,]
  }
  
  plm_all_rev     = plm(fc_err ~ fc_rev, data=data,index=c("ID", "qdate"), model="within")

  beta          = coefficients(plm_all_rev)
  std_error     = sqrt(diag(vcovDC(plm_all_rev, type = "HC1")))
  
  out_beta_rev[iter]   = as.numeric(beta[1])
  out_se_rev[iter]     = as.numeric(std_error[1])

  plm_all_cons   = plm(fc_err ~ cons, data=data,index=c("ID", "qdate"), model="within")
  
  beta          = coefficients(plm_all_cons)
  std_error     = sqrt(diag(vcovDC(plm_all_cons, type = "HC1")))
  
  out_beta_cons[iter]   = as.numeric(beta[1])
  out_se_cons[iter]     = as.numeric(std_error[1])
}

iter = 0
data_series =  c('us_spf_pgdp_avr.rda','us_spf_cpi_avr.rda','us_spf_rgdp_avr.Rda','us_spf_pgdp_ind.rda','us_spf_cpi_ind.rda','us_spf_rgdp_ind.Rda','us_spf_pgdp_ind_h2.rda', 'us_spf_pgdp_ind_fin.rda', 'us_spf_pgdp_ind_nonfin.rda', 'ea_spf_cpi_avr.rda', 'ea_spf_rgdp_avr.rda','us_liv_cpi_avr.rda', 'us_liv_rgdp_avr.rda')

for ( i in data_series){
  iter = iter+1
  if (iter<=3){
    load(paste(indata1,i,sep=""))
  } else if (iter>=4 & iter<=9) {
    load(paste(indata0,i,sep=""))
  } else if (iter>=10 & iter<=11) {
    load(paste(indata3,i,sep=""))
  } else if (iter>=12) {
    load(paste(indata3,i,sep=""))
  }
  
  if (iter<=3){
    data            = data[data$qdate>=1970,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=4 & iter<=6) {
    data            = ddply(data, .(qdate), summarize, fc_err = mean(fc_err, na.rm = TRUE), fc_rev = mean(fc_rev, na.rm = TRUE))
    data            = data[data$qdate>=1992,]
    data            = data[data$qdate<= 2020,]
  } else if (iter == 7) {
    data            = ddply(data, .(qdate), summarize, fc_err = mean(fc_err, na.rm = TRUE), fc_rev = mean(fc_rev, na.rm = TRUE))
    data            = data[data$qdate>=1970,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=8 & iter<=9) {
    data            = ddply(data, .(qdate), summarize, fc_err = mean(fc_err, na.rm = TRUE), fc_rev = mean(fc_rev, na.rm = TRUE))
    data            = data[data$qdate>=1990,]  #industry data doesn't exist before '90
    data            = data[data$qdate<= 2020,]
  } else if (iter>=10 & iter<=11) {
    data            = data[data$qdate>=2000,]
    data            = data[data$qdate<= 2020,]
  } else if (iter>=12) {
    data            = data[data$qdate<= 2020,]
  }
  
  lm_all_rev      = lm(fc_err ~ fc_rev, data=data)
  
  beta          = coefficients(lm_all_rev)
  std_error     = sqrt(diag(vcovHC(lm_all_rev, type = "HC1")))
  
  out_beta_avr[iter]   = as.numeric(beta[2])
  out_se_avr[iter]     = as.numeric(std_error[2])
}

all_surveys =  data.frame(out_beta_avr, out_se_avr, out_beta_rev,out_se_rev, out_beta_cons,out_se_cons)
rownames(all_surveys) = c("US SPF Deflator", "US SPF Inflation","US SPF Output", "US SPF Defl P92", "US SPF Infl P92","US SPF Out P92", "US SPF Infl H2", "US SPF Infl Fin", "US SPF Infl NonFin", "EA SPF Inflation", "EA SPF Output", 'US LIV Inflation', 'US LIV Output')
colnames(all_surveys) = c("beta avr", "se avr", "beta rev", "se rev", "beta cons", "se cons")

write.table(format(all_surveys,digits=3), file = "_tables/table_1.txt", sep = "\t")
